# 10 Validation/ Accuracy Assessment

install.packages("caret")
# Load required packages
library(terra)    # For spatial raster/vector data
library(sf)       # For vector data handling
library(caret)    # For confusion matrix

# First, lets test a validation with an example dataset
#Afterwards we will use actual geodata
# Reference (ground truth) classes from validation points
reference <- factor(c(
  rep("Wetland", 14),
  rep("Savanna", 33),
  rep("Urban", 20),
  rep("Agriculture", 26),
  rep("Water", 11)
))

# Predicted classes (from your classification output)
prediction <- factor(c(
  rep("Wetland", 12), rep("Savanna", 3), rep("Water", 1),  # Wetland row
  rep("Wetland", 2), rep("Savanna", 28), rep("Urban", 1), rep("Agriculture", 3), # Savanna row
  rep("Urban", 15), rep("Agriculture", 2), # Urban row
  rep("Savanna", 2), rep("Urban", 4), rep("Agriculture", 21), # Agriculture row
  rep("Water", 10) # Water row
))

# Generate and display the confusion matrix
conf_matrix <- confusionMatrix(prediction, reference)

# Print it
print(conf_matrix)

# Extracting overall accuracy
oa <- conf_matrix$overall['Accuracy']

# Extracting per-class User's Accuracy and Producer's Accuracy
user_accuracy <- conf_matrix$byClass[, "Pos Pred Value"]  # User's Accuracy (Precision)
producer_accuracy <- conf_matrix$byClass[, "Sensitivity"] # Producer’s Accuracy (Recall)

# Print all together, this way you get a great summary
data.frame(
  Class = rownames(conf_matrix$table),
  User_Accuracy = round(user_accuracy, 3),
  Producer_Accuracy = round(producer_accuracy, 3)
)

# Now that you have understood the basics, we can use geodata for this process

# Load your classified raster (e.g., GeoTIFF)
raster <- rast("C:/Users/schul/Documents/Berufliches/Hiwi EOCap4Africa/Exercices Finalised/Geodata/9d Land Cover Classification Practical/Data/LC_QGIS_result.tif")

# Load your validation data (must have class labels as an attribute, e.g., "truth")
validation_points <- st_read("C:/Users/schul/Documents/Berufliches/Hiwi EOCap4Africa/Exercices Finalised/Geodata/11 Validation Accuracy Assessment/ground_truth.shp")  # or .shp, .gpkg, etc.

# Optional: reproject validation points to match raster CRS
validation_points <- st_transform(validation_points, crs(raster))

# Extract predicted class values from raster at each point location
predicted_values <- terra::extract(raster, vect(validation_points))[,2]  # [,2] assumes first col is ID

# Use the 'truth' column from your shapefile as the reference
reference_values <- validation_points$truth  # Make sure column name is exactly 'truth'

## Define lookup table to rename predicted codes
class_names <- c("2" = "Forest", "3" = "Urban", "1" = "Water")

# Map predicted values to names
prediction_named <- factor(class_names[as.character(predicted_values)])

# Reference values already contain names, but convert to factor
reference_named <- factor(reference_values)

# Get combined levels to make sure both have same factor levels
all_classes <- union(levels(prediction_named), levels(reference_named))
prediction <- factor(prediction_named, levels = all_classes)
reference <- factor(reference_named, levels = all_classes)

# Run confusion matrix
conf_matrix <- confusionMatrix(prediction, reference)

# Print
print(conf_matrix)

# Optional: per-class metrics
accuracy_table <- data.frame(
  Class = rownames(conf_matrix$table),
  User_Accuracy = round(conf_matrix$byClass[, "Pos Pred Value"], 2),
  Producer_Accuracy = round(conf_matrix$byClass[, "Sensitivity"], 2)
)

print(accuracy_table)
